;;###########################################################################
;; dataobj4.lsp
;; Copyright (c) 1991-2002 by Forrest W. Young
;; This file contains methods for exporting and importing data,
;; Methods rewritten Nov 2002 by Forrest W.Young
;;###########################################################################


(defun export-data (&optional file &key (variables t) (types t) (labels t))
"Args: (&optional file dont-save closing &key (variables t) (types t) (labels t))
FILE is a string naming the file which will contain the to-be exported data. If datasheet is open, editable, and edited, the datasheet is saved so that the dataobject is updated (if the dataobject has family, a new dataobject is created). Then, unless DONT-SAVE is T, the updated or new data-object is written to FILE.lsp as a flat ascii file. When VARIABLES is T the file will have variable names in the first line. Similarly, TYPES and LABELS specify that variable types and observation labels will also be saved. The data can be read in with the IMPORT DATA command.
CLOSING is t when datasheet being closed as well." 
  (cond 
    ((not *user-path*)
     (message-dialog (format nil "Sorry. No User Directory.~%You cannot export data.")))
    (t
     (send *current-data* :export-data file))))

(defmeth mv-data-object-proto :export-data (&optional file dont-save
closing &key (variables t) (types t) (labels t))
"Args: (&optional file dont-save closing &key (variables t) (types t) (labels t))
FILE is a string naming the file which will contain the to-be exported data. If datasheet is open, editable, and edited, the datasheet
is saved so that the dataobject is updated (if the dataobject has family, a
new dataobject is created). Then, unless DONT-SAVE is T, the updated or new
data-object is written to FILE.lsp as a flat ascii file. When VARIABLES is T the file will have variable names in the first line. Similarly, TYPES and LABELS specify that variable types and observation labels will also be saved. The data can be read in with the IMPORT DATA command.
CLOSING is t when datasheet being closed as well." 
  (let* ((closed t)
         (string "Export Data to File:")
         (dsob (send *current-data* :datasheet-object))
         (mattype (equal (string-downcase (send self :datatype)) "matrix"))
         (choices) 
         (varnames)
         (datarow)
         (nobs)
         )
    (if (and variables types labels)
        (setf choices '((0 1)))
        (setf choices (choose-subset-dialog "Exported Data Should Include:"
                                            (list "Variable Names and Types"
                                                  "Observation (Row) Labels"))))
    (when choices
          (setf choices (first choices))
          (setf choices (if (member 0 choices)
                            (if (member 1 choices) '(0 1 2) '(0 1))
                            (if (member 1 choices) '(2) '(nil))))
          (when dsob (send dsob :error-check))

          (when (not dont-save)
                (when (not (set-working-directory *user-dir-name*))
                      (set-working-directory "C:\\windows\\desktop"))
                (when
                 (not file) 
                 (setf file
                       #+macintosh(set-file-dialog string "" t)
                       #+msdos    (set-file-dialog string t 
                                       "Plain Text Files(*.TXT)|*.txt|All Files(*.*)|*.*")
                       #+X11    (if file (file-save-dialog string "*.txt" "." file )
                                    #+X11        (file-save-dialog string "*.txt" "."))
                       ))
                (when file (setf *user-dir-name* (get-working-directory))))
          (when (and (send self :datasheet-open)
                     (send (send current-data :datasheet-object) :editable))
                (setf closed (send (send self :datasheet-object) 
                                   :save-datasheet t closing)))

          (when (and file (not dont-save))
                (when closed
                      (when dsob (send dsob :save-datasheet-arguments))
                      (when (and *datasheet* (not (equal dsob *datasheet*)))
                            (send *datasheet* :save-datasheet-arguments))
                      (setf varnames (send *current-data* :active-variables '(all)))
                      (setf typenames (send *current-data* :active-types '(all)))
                      (setf datarow 
                            (if mattype
                                (send self :get-active-data-rows)
                                (row-list (send *current-data* :active-data-matrix '(all)))))
                      (setf nobs (length datarow))
                      (setf file (string-downcase-if-not-X11 file))
                      (when (member 2 choices) (setf labels (send self :active-labels)))
                      (when (> (length file) 3)
                            (cond
                              ((string= ".lsp" file :start2 (- (length file) 4))
                               (setf file (string-right-trim ".lsp" file)))
                              ((string= ".txt" file :start2 (- (length file) 4))
                               (setf file (string-right-trim ".txt" file)))))
                      (setf file (strcat (string file) ".txt"))
                      (format t "; Export: ~a~%> " file)
                      
                      (let ((f (open file :direction :output))
                            (oldbreak *breakenable*)
                            (asym?))

                        (setq *breakenable* nil)
                        
                        (unwind-protect
                         
                         (when (member 2 choices)
                               (setf varnames 
                                     (combine (strcat "ViSta:" (send self :name))
                                              varnames))
                               (if mattype 
                                   (setf asym? (not (not 
                                           (member "asymmetric" 
                                            (map-elements #'string-downcase (send $ :shapes))
                                            :test #'equal)))))
                               (setf typenames 
                                     (if mattype
                                         (combine (if asym? "Asymmetric" "Symmetric")
                                                  typenames)
                                         (combine (send self :datatype) typenames))))

                         (when (member 0 choices)                         
                               (mapcar #'(lambda (var) (format f "~s " var)) varnames)
                               (terpri f))
                         (when (member 1 choices)
                               (mapcar #'(lambda (var) (format f "~s " var)) typenames)
                               (terpri f))
                         (cond
                           (mattype
                            (let ((matrix-names (send self :active-matrices '(all)))
                                  (shapes (send self :active-shapes '(all)))
                                  (nmat (send self :nmat))
                                  (nvar (send self :nvar)))
                              (dotimes (j nmat)
                                       (dotimes (i nvar)
                                                (when (member 2 choices)
                                                      (format f "~s "
                                                              (strcat
                                                               (select matrix-names j)
                                                               ":"
                                                               (string-capitalize
                                                                (select shapes j))
                                                               ":"
                                                               (select labels i))))
                                                (mapcar #'(lambda (val) 
                                                            (format f "~s " val))
                                                        (coerce (select datarow i) 'list))
                                                (terpri f)))))
                           (t
                            (dotimes (i nobs) 
                                     (when (member 2 choices)
                                           (format f "~s " (select labels i)))
                                     (mapcar #'(lambda (val) 
                                                 (format f "~s " val))
                                             (coerce (select datarow i) 'list))
                                     (terpri f))
                            ))
                         (setq *breakenable* oldbreak)
                         (close f)
                         f)))))
    t))



(defmeth mv-data-object-proto :import-data (&optional file)
  (import-data file))


(defun import-data (&optional file)
"Args: (&optional FILE)
Imports a rectangular dataset from the file FILE. A dialog box is presented if FILE is NIL. A file whose contents are a rectangular dataset is a multiple record (line) file, where each record has the same number of elements, an element being a number, a symbol or a string. 
  A symbol is an unquoted group of alphanumeric characters containing no whitespace. A string is a double-quoted group of alphanumeric characters which may contain whitespace. Note that symbols are converted to upper case, whereas strings retain their original case. Missing data elements are represented by the symbol nil or NIL (\"nil\" (regardless of case) is interpreted as a string, not as a missing data-element. 
  A rectangular dataset has three types of records, called the NAMES, TYPES and DATA records. The NAMES record, which is first, specifies the name of the dataset (the first element on the record) and the name of each variable (the remaining elements). The TYPES record, which is second, defines the dataset type (first) and the types of each variable (the rest). The NAMES and TYPES records can only have elements which are character strings. The NAMES can be any string of characters. The data types can be one of the following: \"category\", \"univariate\", \"bivariate\", \"multivariate\", \"classification\", \"frequency\", \"freqclass\", \"crosstabs\", \"general\", \"missing\", \"symmetric\" or \"asymmetric\".  . The variable types can be either \"category\", \"ordinal\", or \"numeric\" Capitalization is ignored for all data and variable names.
  The rest of the records are all DATA records. Each data record contains an initial element called the LABEL. The LABEL is a character string or symbol that labels (names) the row of data. The rest of the elements on the record are the data values, there being one for each variable. A data value must be a number if the corresponding variable type is \"ordinal\", or \"numeric\", but can be any type of element if the variable type is \"category\"."

  (let* ((data) (name) (vista?) (plain?) (anon?) (untyped?) 
         (datatype) (matrix?) (nlists 0) (nelements 0)
         (datamat) (nrows) (ncols) (nvar) (nobs) (nmat) (flag) (ambiguous)
         (vartokens0) (vartokens1)
         (varnames) (vartypes) (obslabel) (matnames) (num-vartypes 0)
         (matshape) (datasheet?) (d) (datum) (types))
    

    ;READ FILE
    (setf data (import-data-columns))
    (when data (format t "; ImPort: ~a~%> " *read-data-file-name*))
    (unless data (fatal-message "FILE DOES NOT CONTAIN IMPORTABLE DATA. All lines of the file must have the same number of data-elements, each of which must be separated by white space."))


    ;GET DATA SIZES
    (setf nlists (length data))
    (setf nelements (length (first data)))
    
    
    ;GET FIRST TWO RECORD TYPES
    (setf 2records (first-two-records data))
;(one-button-dialog (format nil "~a " 2records))

    (setf vista? (and (equal (first 2records)   "vistaname")
                      (equal (second 2records)  "vistatype")))

    (setf anon? (and (equal (first 2records)    "name")
                       (equal (second 2records) "type")))

    (setf plain? (and (equal (first 2records)   "data")
                      (equal (second 2records)  "data")))

    (setf untyped? (and (equal (first 2records) "name")
                       (equal (second 2records) "data")))

    (setf export-file-type (cond (vista? "ViSta Export") (plain? "Flat Text")
                             (anon? "Anon Export") (untyped? "Untyped Export")))

    (setf name (get-imported-data-name (select (select data 0) 0) (or plain? untyped?)))
    (setf name (first name))



    ;GET FIRST COLUMN VARIABLE TYPE

    (cond
      ((or vista? anon?)
       (setf labeltype (if (every 'numberp (rest (rest (first data))))
                           "Numeric" "Category")))
      (plain?
       (setf labeltype (if (every 'numberp (rest (first data)))
                           "Numeric" "Category")))
      (untyped?
       (setf labeltype (if (every 'numberp (first data))
                           "Numeric" "Category"))))


    ;IMPORT DATA

    (format t "; Import: ~a Data File [~d records, ~d columns]~%> "
           export-file-type nelements nlists)
    (cond
      (vista? (import-vista-data name data labeltype))
      (plain? (import-plain-data name data labeltype)) 
      (anon?  
       (setf vartypes (second (transpose data)))
       (import-anon-data name data vartypes labeltype))
      (untyped? (import-untyped-data name data labeltype)))
    (send $ :info)
    ))


;FUNCTIONS TO IMPORT DATA

(defun import-vista-data (name data labeltype)
  (let* ((missing?)
         (nlists (length data))
         (nelements (length (first data)))
         (data (transpose (matrix (list nlists nelements) (combine data))))
         (nvar (- nlists 1))
         (nobs (- nelements 2))
         (datatype (string-downcase (select data 1 0)))
         (varnames (combine (select data 0 (iseq 1 nvar))))
         (vartypes (combine (select data 1 (iseq 1 nvar))))
         (obslabels (combine (select data (iseq 2 (1+ nobs)) 0)))
         (data      (combine (select data (iseq 2 (1+ nobs)) (iseq 1 nvar))))
         (freq?     (or (equal datatype "frequency")
                        (equal datatype "freqclass")
                        (equal datatype "crosstabs")))
         (matrix?  (or (equal datatype "symmetric")
                       (equal datatype "asymmetric")))
         (matnames  (if matrix? (remove-duplicates (extract-mat-names obslabels) 
                                                   :test #'equal) nil))
         (matshapes (if matrix? (extract-mat-shapes obslabels) nil))
         )
    (make-data-object name data varnames 
                      :types vartypes 
                      :labels obslabels 
                      :datatype datatype
                      :missing? missing? 
                      :freq? freq? 
                      :matnames matnames)
    ))

(defun import-anon-data (name data ambiguous-vartypes labeltype)
  (let* ((missing?)
         (datatype)
         (nvar (- (length data) 1))
         (nobs (- (length (first data)) 2))
         (legit-types (list "category" "numeric" "ordinal"
                            'category  'numeric  'ordinal))
         (vartypes
          (select 
           (mapcar #'(lambda (var type)
                       (unless missing? (setf missing? (position 'nil var)))
                       (if (member (string-downcase type) legit-types :test #'equal) type
                           nil))
                   data ambiguous-vartypes)
           (iseq 1 nvar)))
         (varnames
          (select
           (mapcar #'(lambda (var) 
                       (cond
                         ((stringp (first var)) (first var))
                         ((symbolp (first var)) (format nil "~s" (first var)))
                         ((numberp (first var)) (format nil "~s" (first var)))
                         (t (format nil "Var~a" (1+ x)))))
                   data)
           (iseq 1 nvar)))
         (obslabels 
             (mapcar #'(lambda (element)
                         (format nil "~a" element))
                     (select (select data 0) (iseq 2 (+ nobs 1)))))
         (data (combine (transpose 
                         (select (matrix (list nvar (+ nobs 2))  
                                         (combine (select data (iseq 1 nvar))))
                                 (iseq nvar) (iseq 2 (+ nobs 1))))))
         (result (datatype-dialog))
         (freq? (= (first result) 1))
         (matrix? (= (first result) 2))
         (nmat (if matrix? (/ nobs nvar)))
         (matnames))
    (when (and matrix? (not (integerp nmat)))
          (error-message (format nil "THIS FILE CONTAINS MATRIX DATA WHICH CANNOT BE IMPORTED.   ~%The number of observations (which is ~d) must be an integer multiple of the number of variables (which is ~d)." nobs nvar)))
    (setf matnames (if matrix? (mapcar #'(lambda (x) (format nil "Matrix~a" (1+ x)))
                                       (iseq (/ nobs nvar)))))
    ;(setf datatype (generalized-datatype vartypes freq? nil missing? matrix?)) 
    (make-data-object name data varnames 
                      :types vartypes 
                      :labels obslabels 
                     ; :datatype datatype
                      :missing? missing? 
                      :freq? freq? 
                      :matnames matnames)
    ))


(defun import-untyped-data (name data labeltype)
  (let* ((missing?)
         (ambiguous)
         (nvar (length data))
         (nobs (- (length (first data)) 1))
         (result (datatype-dialog))
         (error)
         (varnames)
         (justdata)
         (vartypes
          (mapcar #'(lambda (var)
                      (unless missing? (setf missing? (position 'nil var)))
                      (setf varnames (append varnames (list (first var))))
                      (setf justdata (append justdata (rest var)))
                      (cond
                        ((every 'numberp (rest var)) "Numeric")
                        ((not (some  'numberp (rest var))) "Category")
                        (t (setf error t) "Mixed")))
                  data))
         (obslabels 
          (mapcar #'(lambda (x) (format nil "Obs~a" (1+ x))) (iseq nobs)))
         (data (combine (transpose (matrix (list nvar nobs) justdata))))
         (freq? (= (first result) 1))
         (matrix? (= (first result) 2))
         (nmat (if matrix? (/ nobs nvar)))
         (matnames))
    (when error 
          (error-message (format nil "THIS FILE IS STRUCTURED WRONG: CANNOT IMPORT DATA."))
          (TOP-LEVEL))
    (when (and matrix? (not (integerp nmat)))
          (error-message (format nil "THIS FILE CONTAINS MATRIX DATA WHICH CANNOT BE IMPORTED.   ~%The number of observations (which is ~d) must be an integer multiple of the number of variables (which is ~d)." nobs nvar))
          (TOP-LEVEL))
    (setf matnames (if matrix? (mapcar #'(lambda (x) (format nil "Matrix~a" (1+ x)))
                                       (iseq (/ nobs nvar)))))
   ; (setf datatype (generalized-datatype vartypes freq? nil missing? matrix?))
    (make-data-object name data varnames 
                      :types vartypes 
                      :labels obslabels 
                     ; :datatype datatype
                      :missing? missing? 
                      :freq? freq? 
                      :matnames matnames)
    ))



(defun import-plain-data (name data labeltype)
  (let* ((missing?)
         (ambiguous)
         (nvar (length data))
         (nobs (length (first data)))
         (result (datatype-dialog))
         (error)
         (vartypes
          (mapcar #'(lambda (var)
                      (unless missing? (setf missing? (position 'nil var)))
                      (cond
                        ((every 'numberp var) "Numeric")
                        ((every 'numberp (rest var)) 
                         (setf error t)
                         "AllButFirstNumeric")
                        ((not (some  'numberp var)) "Category")
                        (t (setf error t) "Mixed")))
                  data))
         (varnames 
          (mapcar #'(lambda (x) 
                      (format nil "Var~a" (1+ x))) 
                  (iseq nvar)))
         (obslabels 
          (mapcar #'(lambda (x) (format nil "Obs~a" (1+ x))) (iseq nobs)))
         (data (combine (transpose (matrix (list nvar nobs) (combine data)))))
         (freq? (= (first result) 1))
         (matrix? (= (first result) 2))
         (nmat (if matrix? (/ nobs nvar)))
         (matnames))
    (when error 
          (error-message (format nil "THIS FILE IS STRUCTURED WRONG: CANNOT IMPORT DATA.       ~%There must be two lines before the data to specify names and types."))
          (TOP-LEVEL))
    (when (and matrix? (not (integerp nmat)))
          (error-message (format nil "THIS FILE CONTAINS MATRIX DATA WHICH CANNOT BE IMPORTED.   ~%The number of observations (which is ~d) must be an integer multiple of the number of variables (which is ~d)." nobs nvar))
          (TOP-LEVEL))
    (setf matnames (if matrix? (mapcar #'(lambda (x) (format nil "Matrix~a" (1+ x)))
                                       (iseq (/ nobs nvar)))))
   ; (setf datatype (generalized-datatype vartypes freq? nil missing? matrix?))
    (make-data-object name data varnames 
                      :types vartypes 
                      :labels obslabels 
                     ; :datatype datatype
                      :missing? missing? 
                      :freq? freq? 
                      :matnames matnames)
    ))


;SUPPORTING FUCTIONS

(defun extract-mat-names (labels)
  (mapcar #'(lambda (label)
              (setf pstn (position #\: label :test #'equal))
              (select label (iseq pstn)))
          labels))

(defun extract-mat-shapes (labels)
  (mapcar #'(lambda (label)
              (setf pstn (position #\: label :test #'equal))
              (select label (iseq (+ 1 pstn) (+ 4 pstn))))
          labels))

(defun first-two-records (data)
  (let* ((tdata (transpose data))
         (all-s1?              (all-strings-or-symbols? (first  tdata)))
         (all-s2?              (all-strings-or-symbols? (second tdata)))
         (rest-n1?             (all-numeric? (rest      (first  tdata))))
         (rest-n2?             (all-numeric? (rest      (second tdata))))
         (all-vt1? (if all-s1? (all-vartypes? (rest (first tdata))) nil))
         (all-vt2? (if all-s2? (all-vartypes? (rest (first tdata))) nil))
         (dt?      (all-datatypes? (list (select (select tdata 1) 0))))
         )
    (list (list all-s1? rest-n1? all-vt1? dt?)
          (list all-s1? rest-n1? all-vt1? dt?))))
        
        
(defun record-type? (record)
  (let* ((all-s?  (all-strings-or-symbols? record))
         (all-vt? (if all-s? (all-vartypes? (rest record)) nil))
         (vn?     (if all-s? (vistaname? (first record))))
         (dt?     (if (and all-s? (not vn?)) (all-datatypes? (list (first record)))))
         )
    (cond 
      ((and all-s? dt? all-vt?) "vistatype")
      ((and all-s? vn?) "vistaname")
      ((and all-s? (not all-vt?)) "name")
      ((and all-s? all-vt?) "type")
      (t "data"))))

(defun first-two-records (data)
  (let* ((tdata (transpose data))
         )
    (list (record-type? (first tdata)) (record-type? (second tdata)))))

(defun import-data-columns (&optional (file (open-file-dialog t 
                                            "Plain Text Files(*.TXT)|*.txt|All Files(*.*)"
                                            "Import Data"))
                                    (cols (if file (count-file-columns file))))
"Args: (&optional file cols)
Reads the data in FILE as COLS columns and returns a list of lists representing the columns."
  (if (not file) 
      (top-level nil)
      (when (not cols) 
            (fatal-message "FILE DOES NOT CONTAIN RECTANGULAR INFORMATION. All lines of the file do not have the same number of data-elements") (top-level nil)))
  (transpose (split-list (read-data-file file) cols)))


(defun get-imported-data-name (name &optional flag)
"Gets best guess as to dataset name, based on nature of NAME and on the name of the file containing the dataset. Returns name and a logical value indiating whether ViSta created this file."
  (let* ((filename (string-capitalize (pathname-name *read-data-file-name*)))
         (vista?   (if (and name (stringp name) (> (length name) 4))
                       (equal (string-downcase (select name (iseq 5))) "vista")))
         (name     (cond
                     (flag filename)  
                     ((not name) filename)
                     ((symbolp name) (string-downcase name))
                     ((not (stringp name)) filename)
                     (vista? (reverse (select (reverse name) (iseq (- (length name) 6)))))
                     ((equal (string-downcase name) "labels/names") filename)
                     ((equal (string-downcase name) "labels") filename)
                     (t name))))
    (list name vista?)))

(defun all-strings? (list)
  (every 'stringp list))

(defun all-symbols? (list)
  (every 'symbolp list))

(defun all-strings-or-symbols? (list)
  (let* ((where-strings (where 'stringp list))
         (where-symbols (where 'symbolp list)))
    (= (length (union where-strings where-symbols)) (length list))))

(defun all-numeric? (list)
  (every 'numberp list))


    
(defun all-vartypes? (list)
  (let* ((legit-types (list "category" "numeric" "ordinal"
                            'category  'numeric  'ordinal))
         (tnil (mapcar #'(lambda (element)
                           (not (not (member (string-downcase element)
                                             legit-types :test #'equal))))
                       list)))
    (setf tnil (remove-duplicates tnil))
    (and (= (length tnil) 1) (first tnil))))


(defun symbol-from-string (string)
     (intern (string-upcase string)))

(defun vistaname? (name)
  (and name (stringp name) (> (length name) 4)
       (equal (string-downcase (select name (iseq 5))) "vista")))

(defun all-datatypes? (list)
  (let* ((legit-types (combine (possible-datatypes) "symmetric" "asymmetric"))
         (legit-types (combine legit-types 
                               (map-elements #'symbol-from-string legit-types)))
         (tnil (mapcar #'(lambda (element)
                           (not (not (member (string-downcase element)
                                             legit-types :test #'equal))))
                       list)))
    (setf tnil (remove-duplicates tnil))
    (and (= (length tnil) 1) (first tnil))))


(defun make-data-object (name datalist variables &key types labels datatype
                              missing? freq? matnames matshapes )
"Args: name datalist variables &key types freq? labels matnames matshapes missing? 
Makes NAME, a ViSta Data Object, from DATALIST, an observation-major-order list of data, and VARIABLES, a list of variable names. All variables are assumed to be numeric unless TYPES is used to specify whether variables are numeric, ordinal or category. FREQ? must be T when the data are frequencies. Default observation labels (Obs1 Obs2, etc.) are used unless LABELS is included. MATNAMES and MATSHAPES must be used when the data are matrices. MISSING? may be set to T to return computation time when there are missing values."
  (data name
        :data      datalist
        :variables variables
        :types     types
        :labels    labels
        :freq      freq?
      ; :datatype  datatype
        :matrices  matnames
        :shapes    matshapes
        :missing   missing?))

(defun datatype-dialog ()
  (let* ((data-type-text  (send text-item-proto :new "PLEASE IDENTIFY THE BASIC NATURE OF  THESE DATA:"))
         (data-type       (send choice-item-proto :new 
                                (list "They measure quantity or ammount" 
                                      "They are counts or frequency measures" 
                                      "The are measures of association")))
         (ok (send modal-button-proto :new "Import"
                   :action #'(lambda () 
                               (list
                                (send data-type :value)))))
         (cancel (send modal-button-proto :new "Cancel"))
         (help   (send button-item-proto :new "Help" 
                       :action #'(lambda () 
                                   (get-menu-item-help IMPORT-DATA-FILE-MENU-ITEM))))
         (dialog (send modal-dialog-proto :new
                       (list data-type-text  data-type
                             (list ok cancel help))
                       :title "Import Data"))
         (result nil)
         )
    (setf result (send dialog :modal-dialog))
    result))
  